Diet (dis)-similarity

Author

Max Lindmark

Published

2025-01-12

Load packages

home <- here::here()

# Load libraries
library(tidyverse)
library(tidylog)
library(RCurl)
library(gllvm)
library(RColorBrewer)
library(ggrepel)
library(vegan)
library(patchwork)
library(RCurl)
library(devtools)
library(ggpubr)
library(janitor)
library(brms)
library(modelr)
library(tidybayes)
library(ggstats)
library(broom)
library(broom.mixed)

# Source map-plot
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
Reading layer `StatRec_map_Areas_Full_20170124' from data source 
  `/Users/maxlindmark/Dropbox/Max work/R/cod-interactions/data/shapefiles/ICES-StatRec-mapto-ICES-Areas/StatRec_map_Areas_Full_20170124.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 11074 features and 17 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -44 ymin: 36 xmax: 69 ymax: 85
Geodetic CRS:  WGS 84
#source(paste0(home, "R/functions/map-plot.R"))

# Source code for rotating ordination plot (to make it work in ggplot)
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/rotate.R")
#source(paste0(home, "R/functions/rotate.R"))

Read data

d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))

Diet similarity using bar plots and ordination

Ordination using gllvm

d2 <- d |>
  dplyr::select(ends_with("_tot"))

d2$pred_id <- d$pred_id

d2 <- d2 |>
  left_join(d |> dplyr::select(pred_length_cm, pred_weight_g, pred_id, species), by = "pred_id")
left_join: added 3 columns (pred_length_cm, pred_weight_g, species)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     9,864
           >                 =======
           > rows total       9,864
# Calculate relative prey weight and average by size-class
d3 <- d2 |> 
  pivot_longer(ends_with("_tot")) |> 
  mutate(value = value/pred_weight_g) |> 
  filter(pred_length_cm >= 10 & pred_length_cm < 50) |> # These are the sizes we work with
  mutate(pred_length_cm = cut_width(pred_length_cm, 2),
         pred_length_cm = str_remove(pred_length_cm, "[(]")) |> 
  separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) |> 
  mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
         pred_length_cm = as.numeric(pred_length_cm)) |> 
  dplyr::select(-scrap) |> 
  group_by(species, pred_length_cm, name) |> 
  summarise(tot_prey_weight = mean(value)) |>
  ungroup() |> 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name)) |> 
  filter(!(species == "Flounder" & pred_length_cm %in% c(45, 43, 39, 9)))
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 7,515 rows (5%), 140,445 rows remaining
summarise: now 570 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
filter: removed 60 rows (11%), 510 rows remaining
#Sample size per 2 cm class?
d3 |> filter(species == "Flounder") |> distinct(pred_length_cm) |> arrange(pred_length_cm)
filter: removed 300 rows (59%), 210 rows remaining
distinct: removed 196 rows (93%), 14 rows remaining
# A tibble: 14 × 1
   pred_length_cm
            <dbl>
 1             11
 2             13
 3             15
 4             17
 5             19
 6             21
 7             23
 8             25
 9             27
10             29
11             31
12             33
13             35
14             37
d2 |> pivot_longer(ends_with("_tot")) |> summarise(n = n(), .by = pred_id) |> distinct(n)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
summarise: now 9,864 rows and 2 columns, ungrouped
distinct: removed 9,863 rows (>99%), one row remaining
# A tibble: 1 × 1
      n
  <int>
1    15
t <- d2 |>
  pivot_longer(ends_with("_tot")) |>
  mutate(value = value/pred_weight_g) |>
  filter(pred_length_cm >= 10 & pred_length_cm <= 50) |> # These are the sizes we work with
  mutate(pred_length_cm = cut_width(pred_length_cm, 2),
         pred_length_cm = str_remove(pred_length_cm, "[(]")) |>
  separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) |>
  mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
         pred_length_cm = as.numeric(pred_length_cm)) |>
  dplyr::select(-scrap) |>
  filter(!(species == "Flounder" & pred_length_cm %in% c(45, 43, 39, 9))) |>
  summarise(n = n(), .by = c(species, pred_length_cm)) |>
  mutate(n = n/15) |>
  arrange(n)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 6,915 rows (5%), 141,045 rows remaining
filter: removed 120 rows (<1%), 140,925 rows remaining
summarise: now 35 rows and 3 columns, ungrouped
t
# A tibble: 35 × 3
   species  pred_length_cm     n
   <chr>             <dbl> <dbl>
 1 Flounder             11    17
 2 Flounder             37    18
 3 Cod                  49    40
 4 Flounder             13    59
 5 Flounder             35    73
 6 Cod                  47    89
 7 Cod                  11   102
 8 Cod                  45   105
 9 Cod                  13   107
10 Cod                   9   107
# ℹ 25 more rows
summary(t)
   species          pred_length_cm       n        
 Length:35          Min.   : 9     Min.   : 17.0  
 Class :character   1st Qu.:18     1st Qu.:107.0  
 Mode  :character   Median :27     Median :287.0  
                    Mean   :27     Mean   :268.4  
                    3rd Qu.:35     3rd Qu.:412.0  
                    Max.   :49     Max.   :508.0  
ggplot(t, aes(pred_length_cm, n, color = species)) +
  geom_point()

d3 |> filter(tot_prey_weight > 0) |> arrange(tot_prey_weight)
filter: removed 116 rows (23%), 394 rows remaining
# A tibble: 394 × 4
   species  pred_length_cm name            tot_prey_weight
   <chr>             <dbl> <chr>                     <dbl>
 1 Cod                  35 Non Bio             0.000000199
 2 Cod                  47 Amphipoda           0.000000251
 3 Cod                  45 Polychaeta          0.000000261
 4 Cod                  41 Bivalvia            0.000000340
 5 Flounder             33 Mysidae             0.000000366
 6 Cod                  43 Amphipoda           0.000000371
 7 Cod                  43 Polychaeta          0.000000480
 8 Cod                  45 Bivalvia            0.000000833
 9 Flounder             25 Clupea Harengus     0.00000102 
10 Cod                  47 Bivalvia            0.00000137 
# ℹ 384 more rows
d_wide <- d3 |> pivot_wider(names_from = name, values_from = tot_prey_weight)
pivot_wider: reorganized (name, tot_prey_weight) into (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) [was 510x4, now 34x17]
d_mat <- d_wide |>
  dplyr::select(-species, -pred_length_cm) |>
  as.matrix()

str(d_mat)
 num [1:34, 1:15] 0.000329 0.000684 0.000535 0.000535 0.0004 ...
 - attr(*, "dimnames")=List of 2
  ..$ : NULL
  ..$ : chr [1:15] "Amphipoda" "Bivalvia" "Clupea Harengus" "Clupeidae" ...
# Fit gllvm model with two latend variables and no covariates, meaning it becomes an ordination
set.seed(999)

fit <- gllvm(y = d_mat, family = "tweedie", num.lv = 2)
Note that, the tweedie family is implemented using the extended variational approximation method. 
fit
Call: 
gllvm(y = d_mat, family = "tweedie", num.lv = 2)
family: 
[1] "tweedie"
method: 
[1] "VA"

log-likelihood:  2503.932 
Residual degrees of freedom:  451 
AIC:  -4889.864 
AICc:  -4874.13 
BIC:  -4640.033 
# Let's do a ggplot-residual instead
res <- residuals(fit)
p <- NCOL(fit$y)
sppind <- 1:p
ds_res <- res$residuals[, sppind]

# Compare with built-in plot from gllvm: plot(fit, which = 2)
ds_res |> 
  as.data.frame() |> 
  pivot_longer(cols = everything()) |> 
  ggplot(aes(sample = value)) + 
  geom_qq(size = 0.8) + 
  geom_qq_line() + 
  labs(y = "Sample quantiles",
       x = "Theoretical quantiles")
pivot_longer: reorganized (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) into (name, value) [was 34x15, now 510x2]

ggsave(paste0(home, "/figures/supp/qq_ordi_gllvm.pdf"), width = 11, height = 11, units = "cm")

# Plot with GGplot
# Now make the ordination plot. We want to use ggplot, so need to hack it a bit
# See this thread: https://github.com/JenniNiku/gllvm/discussions/116
# ordiplot(fit, biplot = TRUE)

# Extract site scores
LVs = as.data.frame(getLV(fit))

# Bind LV site scores to metadata
# LVs = cbind(LVs, sim.meta)
# 
# ordiplot(fittw, biplot = TRUE)
# 
# ordiplot(fittw, biplot = TRUE, symbols = TRUE, spp.colors = "white")
# 
LVs2 <- rotate(fit)
 
# text?
labs <- LVs2$species |>
  as.data.frame() |> 
  rownames_to_column(var = "prey")

dd <- LVs2$sites |>
  as.data.frame() |> 
  bind_cols(d_wide |> dplyr::select(species, pred_length_cm)) |> 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm >= 25, "Large cod", group))

# Alternative palettes for the groups
pal <- (brewer.pal(n = 11, name = "RdYlBu")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 11, name = "RdYlGn")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 8, name = "Dark2")[c(8, 7, 6)])
#pal <- brewer.pal(n = 8, name = "Dark2")[c(2, 7, 6)]

ordi <- ggplot(dd, aes(x = V1, y = V2, color = factor(group, levels = c("Flounder", "Small cod", "Large cod")),
                       size = pred_length_cm)) +
  geom_point(alpha = 0.6) + 
  # stat_ellipse(aes(V1, V2, color = group), 
  #              inherit.aes = FALSE, linewidth = 1, alpha = 0.3) + 
  scale_radius(range = c(0.8, 4)) +
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) + 
  labs(size = "Predator length [cm]", shape = "Species", fill = "Group", color = "",
       x = "Latent variable 1", y = "Latent variable 2") +
  geom_label_repel(data = labs, aes(V1, V2, label = prey),
                   color = "grey20", inherit.aes = FALSE, size = 2, alpha = 0.4,
                   box.padding = 0.25) +
  theme_sleek() +
  # coord_cartesian(xlim = c(-3, 3),
  #                 ylim = c(-2.5, 2.5)) +
  xlim(-3.5, 3.5) + 
  # ylim(-3, 3) +
  guides(color = guide_legend(ncol = 4),
         size = guide_legend(ncol = 4)) + 
  theme(legend.position = "bottom",
        legend.box = "vertical",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ordi
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_label_repel()`).

Make a plot of the ontogenetic development of diets

# Calculate relative prey weight and average by size-class
d3 <- d2 |> 
  pivot_longer(ends_with("_tot")) |> 
  mutate(value = value/pred_weight_g) |> 
  group_by(species, pred_length_cm, name) |> 
  summarise(tot_prey_weight = mean(value)) |>
  ungroup() |> 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name)) #|> 
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
summarise: now 1,545 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
  #filter(tot_prey_weight < quantile(tot_prey_weight, probs = 0.995))

max_size_cod <- 65

cod_important_prey <- d3 |>
  filter(species == "Cod") |> 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) |> 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |> 
  group_by(name, predator_length_grp) |>
  summarise(prey_group_tot = sum(tot_prey_weight)) |> 
  ungroup() |> 
  group_by(predator_length_grp) |> 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) |> 
  ungroup() |>
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Cod")
filter: removed 510 rows (33%), 1,035 rows remaining
summarise: now 195 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
max_size_fle <- 40

fle_important_prey <- d3 |>
  filter(species == "Flounder") |> 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) |> 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |> 
  group_by(name, predator_length_grp) |>
  summarise(prey_group_tot = sum(tot_prey_weight)) |> 
  ungroup() |> 
  group_by(predator_length_grp) |> 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) |> 
  ungroup() |>
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Flounder")
filter: removed 1,035 rows (67%), 510 rows remaining
summarise: now 105 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- bind_rows(fle_important_prey, cod_important_prey) |> 
  mutate(prop = replace_na(prop, 0))

n_cat <- nrow(area_plot |> distinct(name))
distinct: removed 285 rows (95%), 15 rows remaining
colourCount <- n_cat
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
pal2 <- getPalette(colourCount)

area_plot |> distinct(name)
distinct: removed 285 rows (95%), 15 rows remaining
# A tibble: 15 × 1
   name              
   <chr>             
 1 Amphipoda         
 2 Bivalvia          
 3 Clupea Harengus   
 4 Clupeidae         
 5 Gadus Morhua      
 6 Gobiidae          
 7 Mysidae           
 8 Non Bio           
 9 Other             
10 Other Crustacea   
11 Other Pisces      
12 Platichthys Flesus
13 Polychaeta        
14 Saduria Entomon   
15 Sprattus Sprattus 
# Dataframes for geom_text with sample size
n_cod <- d2 |>
  filter(species == "Cod") |> 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) |> 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |> 
  group_by(predator_length_grp) |>
  summarise(n = length(unique(pred_id)))
filter: removed 3,882 rows (39%), 5,982 rows remaining
summarise: now 13 rows and 2 columns, ungrouped
n_fle <- d2 |>
  filter(species == "Flounder") |> 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) |> 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |> 
  group_by(predator_length_grp) |>
  summarise(n = length(unique(pred_id)))
filter: removed 5,982 rows (61%), 3,882 rows remaining
summarise: now 7 rows and 2 columns, ungrouped
n_dat <- bind_rows(n_cod |> mutate(predator = "Cod"), 
                   n_fle |> mutate(predator = "Flounder")) |> 
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- area_plot |> 
  mutate(name = str_to_sentence(name))

bar_diet <- 
  ggplot(data = area_plot, aes(x = max_size, y = prop, fill = name, color = name)) +
  geom_col(width = 4.3) +
  geom_text(data = n_dat, aes(x = max_size, y = 1.08, label = n), inherit.aes = FALSE,
            size = 0, color = "white") +
  geom_text(data = n_dat, aes(x = max_size, y = 1.04, label = n), inherit.aes = FALSE,
            size = 2) +
  facet_wrap(~predator, scales = "free") +
  scale_fill_manual(values = pal2, name = "") +
  scale_color_manual(values = pal2, name = "") +
  coord_cartesian(expand = 0) +
  scale_x_continuous(breaks = seq(0, 100, 5)) +
  labs(y = "Proportion", x = "Max. predator size in group [cm]") +
  theme(legend.key.size = unit(0.2, 'cm'),
        #legend.text = element_text(face = "italic"),
        legend.position = "bottom",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

Combine plots

bar_diet / ordi + plot_annotation(tag_levels = "a") #+ plot_layout(heights = c(1, 1.6))
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_label_repel()`).

ggsave(paste0(home, "/figures/ordi_diet.pdf"), width = 17, height = 21, units = "cm")
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_label_repel()`).

Schoener’s overlap index

# Calculate relative prey weight and average by size-class
qyear_rect_sum <- d2 |> 
  pivot_longer(ends_with("_tot")) |> 
  filter(pred_length_cm >= 10 & pred_length_cm < 50) |> # These are the sizes we work with
  left_join(d |> dplyr::select(year, quarter, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") |> 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm >= 25, "Large cod", group)) |> 
  group_by(year, quarter, ices_rect, group) |>
  summarise(n = length(unique(pred_id)),
            tot_prey = sum(value)) |> 
  ungroup() |> 
  group_by(year, quarter, ices_rect) |> 
  mutate(min_stom = min(n)) |> 
  ungroup() |>
  mutate(id = paste(year, quarter, ices_rect, group, sep = "_")) |> 
  dplyr::select(-year, -quarter, -ices_rect, -group) |> 
  filter(n > 3)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 7,515 rows (5%), 140,445 rows remaining
left_join: added 6 columns (year, quarter, subdiv, ices_rect, X, …)
           > rows only in x         0
           > rows only in y  (    501)
           > matched rows     140,445
           >                 =========
           > rows total       140,445
summarise: now 386 rows and 6 columns, 3 group variables remaining (year, quarter, ices_rect)
ungroup: no grouping variables
ungroup: no grouping variables
filter: removed 55 rows (14%), 331 rows remaining
diet_prop <- d2 |> 
  pivot_longer(ends_with("_tot")) |> 
  filter(pred_length_cm >= 10 & pred_length_cm < 50) |> # These are the sizes we work with
  left_join(d |> dplyr::select(year, quarter, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") |> 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm >= 25, "Large cod", group)) |> 
  group_by(year, quarter, ices_rect, group, name) |>
  summarise(tot_prey_by_grp = sum(value)) |> 
  ungroup() |>
  mutate(id = paste(year, quarter, ices_rect, group, sep = "_")) |> 
  filter(id %in% unique(qyear_rect_sum$id)) |> 
  left_join(qyear_rect_sum, by = "id") |> 
  mutate(prop_prey = tot_prey_by_grp / tot_prey)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 7,515 rows (5%), 140,445 rows remaining
left_join: added 6 columns (year, quarter, subdiv, ices_rect, X, …)
           > rows only in x         0
           > rows only in y  (    501)
           > matched rows     140,445
           >                 =========
           > rows total       140,445
summarise: now 5,790 rows and 6 columns, 4 group variables remaining (year, quarter, ices_rect, group)
ungroup: no grouping variables
filter: removed 825 rows (14%), 4,965 rows remaining
left_join: added 3 columns (n, tot_prey, min_stom)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     4,965
           >                 =======
           > rows total       4,965
# Calculate overlap by year and ices rect and species group. Make the proportions wide!
# unique(diet_prop$group)
diet_prop_wide <- diet_prop |> 
  dplyr::select(-tot_prey_by_grp, -tot_prey, -id, -n) |> 
  pivot_wider(names_from = group, values_from = prop_prey) |> 
  mutate(abs_f_sc = abs(Flounder - `Small cod`),
         abs_f_lc = abs(Flounder - `Large cod`),
         abs_lc_sc = abs(`Large cod` - `Small cod`)) |> 
  group_by(year, quarter, ices_rect) |> 
  summarise(`Flounder\nSmall cod` = 1 - 0.5*sum(abs_f_sc),
            `Flounder\nLarge cod` = 1 - 0.5*sum(abs_f_lc),
            `Small cod\nLarge cod` = 1 - 0.5*sum(abs_lc_sc))
pivot_wider: reorganized (group, prop_prey) into (Flounder, Large cod, Small cod) [was 4965x7, now 2145x8]
summarise: now 143 rows and 6 columns, 2 group variables remaining (year, quarter)
diet_prop_wide$latitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lat
diet_prop_wide$longitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lon

diet_prop_wide <- sdmTMB::add_utm_columns(diet_prop_wide)
Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
ovr <- diet_prop_wide |>
  pivot_longer(c(`Flounder\nSmall cod`, `Flounder\nLarge cod`, `Small cod\nLarge cod`),
               names_to = "overlap_group", values_to = "overlap") |> 
  drop_na(overlap)
pivot_longer: reorganized (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) into (overlap_group, overlap) [was 143x10, now 429x9]
drop_na (grouped): removed 170 rows (40%), 259 rows remaining
summary(ovr$overlap)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.02065 0.08108 0.16411 0.24113 0.95023 
# Plot diet overlap
# plot_map +
#   geom_sf(color = "gray80") + 
#   geom_point(data = ovr, aes(X*1000, Y*1000, color = overlap),
#              size = 10) + 
#   viridis::scale_color_viridis() +
#   geom_sf() + 
#   facet_wrap(~overlap_group, ncol = 2) + 
#   NULL
  
set.seed(99)
ps <- ggplot(ovr, aes(overlap_group, overlap, color = ices_rect)) + 
  geom_jitter(height = 0, width = 0.1, alpha = 0.3) + 
  geom_boxplot(aes(overlap_group, overlap),
               inherit.aes = FALSE, fill = NA, width = 0.2, alpha = 0.2, size = 0.6) + 
  viridis::scale_color_viridis(discrete = TRUE, name = "ICES\nrectangle") + 
  geom_hline(yintercept = 0.6, linetype = 2, alpha = 0.6) + 
  labs(y = "Schoener's overlap index",
       x = "") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0.01, "cm"),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ps

# summary(ovr$overlap)
# 
ovr |> filter(overlap == 0)
filter (grouped): removed 254 rows (98%), 5 rows remaining
# A tibble: 5 × 9
# Groups:   year, quarter [4]
   year quarter ices_rect latitude longitude     X     Y overlap_group   overlap
  <dbl>   <dbl> <chr>        <dbl>     <dbl> <dbl> <dbl> <chr>             <dbl>
1  2018       1 42G6          56.8      16.5  592. 6291. "Flounder\nLar…       0
2  2021       1 42G6          56.8      16.5  592. 6291. "Small cod\nLa…       0
3  2021       4 42G6          56.8      16.5  592. 6291. "Flounder\nLar…       0
4  2022       4 42G6          56.8      16.5  592. 6291. "Flounder\nLar…       0
5  2022       4 45G9          58.2      19.5  764. 6465. "Flounder\nLar…       0

How does the diet overlap relate to biomass density of the species?

## Read in trawl data. There's one file for cod and one for flounder
# They go up to 2020 Q1
# trawl_surveys_cod <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv", sep = ";")
# trawl_surveys_fle <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv", sep = ";")

# See 01-collate-stomach-data.qmd
af <- 0.01957789
bf <- 2.832485
ac <- 0.009149862
bc <- 2.987961

trawl_surveys_cod <- read.csv2(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv"), sep = ";")
trawl_surveys_fle <- read.csv2(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv"), sep = ";")

# Combine for the two species, filter and clean!
trawl_data <- rbind(trawl_surveys_cod, trawl_surveys_fle) |>
  clean_names() |>
  as.data.frame() |> 
  mutate(group = ifelse(lengthcl >= 100 & lengthcl < 250 & species == "cod", "small_cod", NA),
         group = ifelse(lengthcl >= 250 & lengthcl < 500 & species == "cod", "large_cod", group),
         group = ifelse(lengthcl >= 100 & species == "flounder", "flounder", group))

trawl_data_cod <- trawl_data |> 
  filter(!validity == "I") |> 
  filter(species == "cod") |> 
  mutate(no = no_hour * (duration / 60),
         weight = (ac*(lengthcl/10)^bc) / 1000,
         kg_per_l = no * weight,
         kg_per_l_km2 = kg_per_l/swept_area) |> 
  summarise(kg_km2 = mean(kg_per_l_km2), .by = c(year, quarter, ices, group)) |> 
  filter(group %in% c("small_cod", "large_cod"))
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 7,290 rows (44%), 9,183 rows remaining
summarise: now 539 rows and 5 columns, ungrouped
filter: removed 193 rows (36%), 346 rows remaining
trawl_data_fle <- trawl_data |> 
  filter(!validity == "I") |> 
  filter(species == "flounder") |> 
  mutate(no = no_hour * (duration / 60),
         weight = (af*(lengthcl/10)^bf) / 1000,
         kg_per_l = no * weight,
         kg_per_l_km2 = kg_per_l/swept_area) |> 
  summarise(kg_km2 = mean(kg_per_l_km2), .by = c(year, quarter, ices, group)) |> 
  filter(group %in% c("flounder"))
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 9,183 rows (56%), 7,290 rows remaining
summarise: now 284 rows and 5 columns, ungrouped
filter: removed 97 rows (34%), 187 rows remaining
trawl_data_avg <- bind_rows(trawl_data_cod, trawl_data_fle) |> 
  pivot_wider(names_from = "group", values_from = "kg_km2") |> 
  mutate(across(
    c(small_cod, large_cod, flounder),
    ~replace_na(., 0)
  )) |> 
  rename(ices_rect = ices)
pivot_wider: reorganized (group, kg_km2) into (small_cod, large_cod, flounder) [was 533x5, now 190x6]
rename: renamed one variable (ices_rect)

Read and join the biological data

# Join into diet data
ovr <- ovr |> 
  pivot_wider(names_from = overlap_group, values_from = overlap) |> 
  drop_na() |> 
  left_join(trawl_data_avg, by = c("year", "quarter", "ices_rect"))
pivot_wider: reorganized (overlap_group, overlap) into (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) [was 259x9, now 113x10]
drop_na (grouped): removed 40 rows (35%), 73 rows remaining
left_join: added 3 columns (small_cod, large_cod, flounder)
           > rows only in x     0
           > rows only in y  (117)
           > matched rows      73
           >                 =====
           > rows total        73

Fit zero inflated beta regressions using brms

# https://www.andrewheiss.com/blog/2021/11/08/beta-regression-guide/#zero-inflated-beta-regression-bayesian-style

# Full model
ovr <- ovr |>
  ungroup() |> 
  clean_names() |> 
  pivot_longer(c("flounder_small_cod", "flounder_large_cod", "small_cod_large_cod")) |> 
  mutate(value = ifelse(value < 1e-15, 0, value),
         mean_biom_sc = as.numeric(scale(log(flounder + small_cod + large_cod) / 3)),
         ) #|> 
ungroup: no grouping variables
pivot_longer: reorganized (flounder_small_cod, flounder_large_cod, small_cod_large_cod) into (name, value) [was 73x13, now 219x12]
  # Scale densities before averaging?
  # mutate(mean_scod2 = log(mean_scod)/max(log(mean_scod)),
  #        mean_fle2 = log(mean_fle)/max(log(mean_fle)),
  #        mean_mcod2 = log(mean_mcod)/max(log(mean_mcod)),
  #        mean_biom_sc2 = as.numeric(scale(mean_scod2 + mean_fle2 + mean_mcod2) / 3))
  
# ggplot(ovr, aes(mean_biom_sc, mean_biom_sc2)) + 
#   geom_point()
# cor(ovr$mean_biom_sc, ovr$mean_biom_sc2)

write_csv(ovr, paste0(paste0(home, "/data/clean/s_overlap.csv")))

zib_model <- bf(
  value ~ name*mean_biom_sc,
  phi ~ name,
  zi ~ name,
  family = zero_inflated_beta()
)

fit <- brm(
  formula = zib_model,
  data = ovr,
  cores = 4,
  chains = 4,
  iter = 4000
)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 16.0.0 (clang-1600.0.26.6)’
using SDK: ‘MacOSX15.2.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
  679 | #include <cmath>
      |          ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
fit
 Family: zero_inflated_beta 
  Links: mu = logit; phi = log; zi = logit 
Formula: value ~ name * mean_biom_sc 
         phi ~ name
         zi ~ name
   Data: ovr (Number of observations: 219) 
  Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup draws = 8000

Population-Level Effects: 
                                     Estimate Est.Error l-95% CI u-95% CI Rhat
Intercept                               -2.64      0.14    -2.92    -2.35 1.00
phi_Intercept                            2.27      0.19     1.88     2.63 1.00
zi_Intercept                            -3.63      0.74    -5.25    -2.42 1.00
nameflounder_small_cod                   0.95      0.19     0.57     1.31 1.00
namesmall_cod_large_cod                  1.69      0.20     1.30     2.07 1.00
mean_biom_sc                             0.04      0.11    -0.18     0.25 1.00
nameflounder_small_cod:mean_biom_sc      0.06      0.15    -0.24     0.37 1.00
namesmall_cod_large_cod:mean_biom_sc    -0.35      0.17    -0.69    -0.01 1.00
phi_nameflounder_small_cod              -0.50      0.25    -1.00     0.00 1.00
phi_namesmall_cod_large_cod             -1.32      0.24    -1.80    -0.83 1.00
zi_nameflounder_small_cod               -3.74      3.11   -11.78     0.49 1.00
zi_namesmall_cod_large_cod              -3.76      3.22   -12.19     0.54 1.00
                                     Bulk_ESS Tail_ESS
Intercept                                4004     4664
phi_Intercept                            4021     4773
zi_Intercept                             8867     5000
nameflounder_small_cod                   4601     5047
namesmall_cod_large_cod                  5039     5616
mean_biom_sc                             4647     4910
nameflounder_small_cod:mean_biom_sc      5467     5461
namesmall_cod_large_cod:mean_biom_sc     5785     5784
phi_nameflounder_small_cod               4833     5122
phi_namesmall_cod_large_cod              4876     5101
zi_nameflounder_small_cod                3813     2789
zi_namesmall_cod_large_cod               3965     2546

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
# https://mc-stan.org/misc/warnings.html
tibble(rhat = rhat(fit),
       par = names(rhat(fit))) |> 
  arrange(desc(rhat)) |> 
  arrange(desc(rhat)) |> 
  as.data.frame()
        rhat                                    par
1  1.0011856                                   lp__
2  1.0009022                         b_mean_biom_sc
3  1.0008360            b_zi_nameflounder_small_cod
4  1.0005683 b_namesmall_cod_large_cod:mean_biom_sc
5  1.0005005              b_namesmall_cod_large_cod
6  1.0004599           b_phi_nameflounder_small_cod
7  1.0003747                                 lprior
8  1.0002868  b_nameflounder_small_cod:mean_biom_sc
9  1.0002232                            b_Intercept
10 1.0000061               b_nameflounder_small_cod
11 0.9999951          b_phi_namesmall_cod_large_cod
12 0.9999808                        b_phi_Intercept
13 0.9999250                         b_zi_Intercept
14 0.9997404           b_zi_namesmall_cod_large_cod
zi_intercept <- tidy(fit, effects = "fixed") |> 
  filter(component == "zi") |> 
  pull(estimate) 
Warning in tidy.brmsfit(fit, effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!
filter: removed 9 rows (75%), 3 rows remaining
# Transformed to a probability/proportion
plogis(zi_intercept)*100 # percent
              b_zi_Intercept  b_zi_nameflounder_small_cod 
                    2.585606                     2.324283 
b_zi_namesmall_cod_large_cod 
                    2.282827 
# Compare with data
ovr |> 
  count(value == 0) |> 
  mutate(prop = n / sum(n) *100)
count: now 2 rows and 2 columns, ungrouped
# A tibble: 2 × 3
  `value == 0`     n   prop
  <lgl>        <int>  <dbl>
1 FALSE          217 99.1  
2 TRUE             2  0.913
get_variables(fit)
 [1] "b_Intercept"                           
 [2] "b_phi_Intercept"                       
 [3] "b_zi_Intercept"                        
 [4] "b_nameflounder_small_cod"              
 [5] "b_namesmall_cod_large_cod"             
 [6] "b_mean_biom_sc"                        
 [7] "b_nameflounder_small_cod:mean_biom_sc" 
 [8] "b_namesmall_cod_large_cod:mean_biom_sc"
 [9] "b_phi_nameflounder_small_cod"          
[10] "b_phi_namesmall_cod_large_cod"         
[11] "b_zi_nameflounder_small_cod"           
[12] "b_zi_namesmall_cod_large_cod"          
[13] "lprior"                                
[14] "lp__"                                  
[15] "accept_stat__"                         
[16] "stepsize__"                            
[17] "treedepth__"                           
[18] "n_leapfrog__"                          
[19] "divergent__"                           
[20] "energy__"                              
plot(fit)

# https://discourse.mc-stan.org/t/trying-to-understand-default-prior-for-the-hurdle-part/24337
priors <- prior_summary(fit)
priors
                prior     class                                 coef group resp
               (flat)         b                                                
               (flat)         b                         mean_biom_sc           
               (flat)         b               nameflounder_small_cod           
               (flat)         b  nameflounder_small_cod:mean_biom_sc           
               (flat)         b              namesmall_cod_large_cod           
               (flat)         b namesmall_cod_large_cod:mean_biom_sc           
               (flat)         b                                                
               (flat)         b               nameflounder_small_cod           
               (flat)         b              namesmall_cod_large_cod           
               (flat)         b                                                
               (flat)         b               nameflounder_small_cod           
               (flat)         b              namesmall_cod_large_cod           
 student_t(3, 0, 2.5) Intercept                                                
 student_t(3, 0, 2.5) Intercept                                                
       logistic(0, 1) Intercept                                                
 dpar nlpar lb ub       source
                       default
                  (vectorized)
                  (vectorized)
                  (vectorized)
                  (vectorized)
                  (vectorized)
  phi                  default
  phi             (vectorized)
  phi             (vectorized)
   zi                  default
   zi             (vectorized)
   zi             (vectorized)
                       default
  phi                  default
   zi                  default
stancode(fit)
// generated with brms 2.20.4
functions {
  /* zero-inflated beta log-PDF of a single response
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: zero-inflation probability
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_lpmf(1 | zi);
     } else {
       return bernoulli_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  /* zero-inflated beta log-PDF of a single response
   * logit parameterization of the zero-inflation part
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: linear predictor for zero-inflation part
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_logit_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_logit_lpmf(1 | zi);
     } else {
       return bernoulli_logit_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  // zero-inflated beta log-CCDF and log-CDF functions
  real zero_inflated_beta_lccdf(real y, real mu, real phi, real zi) {
    row_vector[2] shape = [mu * phi, (1 - mu) * phi];
    return bernoulli_lpmf(0 | zi) + beta_lccdf(y | shape[1], shape[2]);
  }
  real zero_inflated_beta_lcdf(real y, real mu, real phi, real zi) {
    return log1m_exp(zero_inflated_beta_lccdf(y | mu, phi, zi));
  }
}
data {
  int<lower=1> N;  // total number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int<lower=1> Kc;  // number of population-level effects after centering
  int<lower=1> K_phi;  // number of population-level effects
  matrix[N, K_phi] X_phi;  // population-level design matrix
  int<lower=1> Kc_phi;  // number of population-level effects after centering
  int<lower=1> K_zi;  // number of population-level effects
  matrix[N, K_zi] X_zi;  // population-level design matrix
  int<lower=1> Kc_zi;  // number of population-level effects after centering
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  matrix[N, Kc_phi] Xc_phi;  // centered version of X_phi without an intercept
  vector[Kc_phi] means_X_phi;  // column means of X_phi before centering
  matrix[N, Kc_zi] Xc_zi;  // centered version of X_zi without an intercept
  vector[Kc_zi] means_X_zi;  // column means of X_zi before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
  for (i in 2:K_phi) {
    means_X_phi[i - 1] = mean(X_phi[, i]);
    Xc_phi[, i - 1] = X_phi[, i] - means_X_phi[i - 1];
  }
  for (i in 2:K_zi) {
    means_X_zi[i - 1] = mean(X_zi[, i]);
    Xc_zi[, i - 1] = X_zi[, i] - means_X_zi[i - 1];
  }
}
parameters {
  vector[Kc] b;  // regression coefficients
  real Intercept;  // temporary intercept for centered predictors
  vector[Kc_phi] b_phi;  // regression coefficients
  real Intercept_phi;  // temporary intercept for centered predictors
  vector[Kc_zi] b_zi;  // regression coefficients
  real Intercept_zi;  // temporary intercept for centered predictors
}
transformed parameters {
  real lprior = 0;  // prior contributions to the log posterior
  lprior += student_t_lpdf(Intercept | 3, 0, 2.5);
  lprior += student_t_lpdf(Intercept_phi | 3, 0, 2.5);
  lprior += logistic_lpdf(Intercept_zi | 0, 1);
}
model {
  // likelihood including constants
  if (!prior_only) {
    // initialize linear predictor term
    vector[N] mu = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] phi = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] zi = rep_vector(0.0, N);
    mu += Intercept + Xc * b;
    phi += Intercept_phi + Xc_phi * b_phi;
    zi += Intercept_zi + Xc_zi * b_zi;
    mu = inv_logit(mu);
    phi = exp(phi);
    for (n in 1:N) {
      target += zero_inflated_beta_logit_lpdf(Y[n] | mu[n], phi[n], zi[n]);
    }
  }
  // priors including constants
  target += lprior;
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
  // actual population-level intercept
  real b_phi_Intercept = Intercept_phi - dot_product(means_X_phi, b_phi);
  // actual population-level intercept
  real b_zi_Intercept = Intercept_zi - dot_product(means_X_zi, b_zi);
}
pp_check(fit) +
  coord_cartesian(expand = TRUE) +
  labs(x = "Schoener's overlap index",
       y = "Density") + 
  theme(legend.position.inside = c(0.8, 0.8)) + 
  guides(color = guide_legend(position = "inside"))
Using 10 posterior draws for ppc type 'dens_overlay' by default.
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.

ggsave(paste0(home, "/figures/supp/schoener_zib_pp_check.pdf"), width = 11, height = 11, units = "cm")

conditional_effects(fit)

set.seed(123)

ovr <- ovr |> 
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2))

p1 <- ovr |>
  data_grid(name = unique(name),
            mean_biom_sc = 0) |>
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |> 
  add_epred_draws(fit) |>
  ggplot(aes(.epred, name2)) +
  geom_jitter(data = ovr, aes(value, name2, color = name2), height = 0.2, width = 0, alpha = 0.4) +
  stat_pointinterval(.width = c(.66, .95), color = "gray30") +
  geom_vline(xintercept = 0.6, linetype = 2, alpha = 0.6) +
  scale_color_brewer(palette = "Set1") + 
  labs(x = "Schoener's overlap index", y = "", color = "") +
  geom_stripped_rows(color = NA) +
  coord_cartesian(expand = 0, xlim = c(-0.03, 1)) +
  theme(plot.margin = unit(c(0, 0.3, 0, 0), "cm")) +
  guides(color = "none") +
  NULL

pal <- brewer.pal(name = "RdBu", n = 7)

# Check CI's
fit |>
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) |>
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) |> 
  summarise(mean = mean(`Small cod\nLarge cod`),
            upr = quantile(`Small cod\nLarge cod`, probs = 0.845))
summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
    mean    upr
   <dbl>  <dbl>
1 -0.317 -0.185
p2 <- fit |>
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) |>
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) |>
  pivot_longer(c(`Flounder\nLarge cod`, `Flounder\nSmall cod`, `Small cod\nLarge cod`),
               names_to = "name2", values_to = "slope") |> 
  #ggplot(aes(slope, `Overlap group`, fill = after_stat(x < 0))) +
  ggplot(aes(slope, name2, fill = name2, color = name2, alpha = after_stat(x < 0))) +
  scale_fill_brewer(palette = "Set1", name = "") +
  scale_color_brewer(palette = "Set1", name = "") + 
  scale_alpha_manual(values = c(0.4, 0.7)) + 
  ggdist::stat_eye() +
  stat_pointinterval(.width = c(.66, .95), color = "gray10") +
  labs(y = "", x = "Slope of biomass density\non the mean Schoener's diet overlap") +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  theme(legend.position.inside = c(0.15, 0.1)) +
  guides(alpha = guide_legend(position = "inside", override.aes = list(alpha = c(0.2, 0.8))),
         fill = "none", color = "none") +
  geom_stripped_rows(aes(slope, name2, fill = name2),
                     inherit.aes = FALSE) +
  coord_cartesian(expand = 0) +
  NULL
pivot_longer: reorganized (Flounder
Large cod, Flounder
Small cod, Small cod
Large cod) into (name2, slope) [was 8000x9, now 24000x8]
Warning in geom_stripped_rows(aes(slope, name2, fill = name2), inherit.aes =
FALSE): Ignoring unknown aesthetics: x and fill
# Conditional
p3 <- ovr |>
  data_grid(name = unique(name),
            mean_biom_sc = seq_range(mean_biom_sc, n = 101)) |>
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |> 
  add_epred_draws(fit) |>
  ggplot(aes(x = mean_biom_sc, y = value, fill = name2, color = name2)) +
  stat_lineribbon(aes(y = .epred), .width = c(.95), alpha = 0.6) +
  geom_point(data = ovr, alpha = 0.5) +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position.inside = c(0.90, 0.84)) + 
  labs(y = "Schoener's overlap index", x = "Scaled cod and flounder mean biomass density",
       fill = "", color = "") + 
  guides(fill = guide_legend(position = "inside"))

pp <- (p1 + p2) + plot_layout(axes = "collect")

pp / p3 +
  plot_annotation(tag_levels = "a")

ggsave(paste0(home, "/figures/schoener_zib.pdf"), width = 17, height = 20, units = "cm")

# What's the actual predictions
sum <- ovr |>
  data_grid(name = unique(name),
            mean_biom_sc = 0) |>
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |> 
  add_epred_draws(fit) |> 
  group_by(name) |> 
  summarise(mean = mean(.epred),
            lwr = quantile(.epred, probs = 0.025),
            upr = quantile(.epred, probs = 0.975)) |> 
  mutate(mean = round(mean, digits = 3),
         lwr = round(lwr, digits = 3),
         upr = round(upr, digits = 3))
summarise: now 3 rows and 4 columns, ungrouped
sum
# A tibble: 3 × 4
  name                 mean   lwr   upr
  <chr>               <dbl> <dbl> <dbl>
1 flounder_large_cod  0.065 0.049 0.085
2 flounder_small_cod  0.155 0.126 0.188
3 small_cod_large_cod 0.279 0.228 0.334
p1 + geom_vline(data = sum, aes(xintercept = mean))

knitr::knit_exit()